## Evaluating goodness of fit between ZIP,ZINB, NB, and P models using data with missing values (Figure 4 in Online Appendix)
## and generating predicted count of riots given varying levels of golkarvs (Figure 5 in Online Appendix) 
## Using: "/Users/risatoha/Documents/Political Competition/Revisions/Conditional Accept/BJPS_data_clean_052115.dta"
## Author: RJT 
## Last updated: 01 June 2015

library(foreign)
library(glmmADMB)
library(coefplot2)
dat<- read.dta("/Users/risatoha/Documents/Political Competition/Revisions/Conditional Accept/BJPS_data_clean_052115.dta")
dim(dat)
dat[1:10,]
summary(dat)
## drop unncessary vars, 
nocorrdat<-subset(dat, select=-c(province, district, elyear, nongolkarvs, b4el, yhat, resid, outliers, elf3sq, compcut3, unsfirprov, conflictprov)) 
dim(nocorrdat)
summary(nocorrdat)
nocorrdat[1:10,]

## subset data to sepstrict==0
nosepdat<-subset(nocorrdat, sepstrict==0)
## manually take out NAs
nosepdatna<-na.omit(nosepdat)
## set grouping variables as factors (e.g., dist_code? post_soeharto? urban? java?)
nosepdatna$dist_code <- factor(nosepdatna$dist_code)
## checking how many levels each factor variable has
sapply(nosepdatna[, sapply(nosepdatna, is.factor)], nlevels)

## make sure all numeric vars are numeric
is.numeric(nosepdatna$year)
is.numeric(nosepdatna$securityadj)
is.numeric(nosepdatna$unemp)
is.numeric(nosepdatna$golkarvs)
## make sure all categorical variables are factors
is.factor(nosepdatna$prov_code)
is.factor(nosepdatna$java)
is.factor(nosepdatna$urban)
is.factor(nosepdatna$afterel)

## setting categorical variabls to factors
nosepdatna$dist_code <- factor(nosepdatna$dist_code)
nosepdatna$prov_code <- factor(nosepdatna$prov_code)
nosepdatna$java<-factor(nosepdatna$java)
nosepdatna$afterel<-factor(nosepdatna$afterel)
nosepdatna$urban<-factor(nosepdatna$urban)
nosepdatna$sepstrict<-factor(nosepdatna$sepstrict)
nosepdatna$postsoeharto<-factor(nosepdatna$postsoeharto)
nosepdatna$turnopp<-factor(nosepdatna$turnopp)

save(nosepdatna, file="nosepdatna.Rdata")

## try checking class
sapply(nosepdatna, class)
summary(nosepdatna$java)
list(nosepdatna$postsoeharto) ## postsoeharto is all 1. Take out postsoeharto from model. 
list(nosepdatna$java)
list(nosepdatna$urban)
list(nosepdatna$afterel) ## afterel is also all 1. Take out this variable from the model. 

## Fitting a negative binomial model
fit_nb <- glmmadmb(unkomptemp ~ golkarvs + rel5050gr21 + unkomptemp_1 + securityadj+
                               lpdrbcap + lpop + larea + urban + java + 
                               (1|dist_code),
                             family = "nbinom",
                             zeroInflation = FALSE,
                             data = nosepdatna)

## Check for highly correlated variabls
pairs(nosepdatna)
##another attempt at a scatterplot matrix. 
pairs(~golkarvs+revvotmar+pdipvs+muslimpartyvs2+unkomptemp+unkomptemp_1+java+urban+postsoeharto+lpop+larea+lpdrbcap+rel5050gr21+gol8797_percent+sevviol+riotsdeath, data=nosepdatna,
                  main="Correlation between Variables")

## Fitting a poisson model
fit_poisson<-glmmadmb(unkomptemp ~ golkarvs  + rel5050gr21 + unkomptemp_1 + securityadj+
                        lpdrbcap + lpop + larea + urban + java  + 
                        (1|dist_code),
                      family = "poisson",
                      zeroInflation = FALSE,
                      data = nosepdatna)
# Fitting a zinb model
fit_zinb<-glmmadmb(unkomptemp ~ golkarvs  + rel5050gr21 + unkomptemp_1 + securityadj+
                        lpdrbcap + lpop + larea + urban + java  + 
                        (1|dist_code),
                      family = "nbinom",
                      zeroInflation = TRUE,
                      data = nosepdatna)
## Fitting a zero-inflated poisson model
fit_zipoisson<-glmmadmb(unkomptemp ~ golkarvs  + rel5050gr21 + unkomptemp_1 + securityadj+
                        lpdrbcap + lpop + larea + urban + java + 
                        (1|dist_code),
                      family = "poisson",
                      zeroInflation = TRUE,
                      data = nosepdatna)
## Fitting a zinb1 model.
fit_zinb1<-glmmadmb(unkomptemp ~ golkarvs  + rel5050gr21 + unkomptemp_1 + securityadj+
                     lpdrbcap + lpop + larea + urban + java + 
                     (1|dist_code),
                   family = "nbinom1", zeroInflation = TRUE,
                   data = nosepdatna)
## Get coefficient summaries of each model
summary(fit_poisson) 
# Call:
#   glmmadmb(formula = unkomptemp ~ golkarvs + rel5050gr21 + unkomptemp_1 + 
#              securityadj + lpdrbcap + lpop + larea + urban + java + (1 | 
#                                                                        dist_code), data = nosepdatna, family = "poisson", zeroInflation = FALSE)
# 
# AIC: 236.3 
# 
# Coefficients:
#   Estimate Std. Error z value Pr(>|z|)    
# (Intercept)  -19.42348    4.31120   -4.51  6.6e-06 ***
#   golkarvs       4.48544    1.07650    4.17  3.1e-05 ***
#   rel5050gr21    2.34044    0.85968    2.72  0.00648 ** 
#   unkomptemp_1   0.11177    0.00996   11.22  < 2e-16 ***
#   securityadj    0.00586    0.00198    2.96  0.00307 ** 
#   lpdrbcap       0.51333    0.30732    1.67  0.09485 .  
# lpop           1.00189    0.30387    3.30  0.00098 ***
#   larea          0.09917    0.17663    0.56  0.57448    
# urban          1.25025    0.78948    1.58  0.11327    
# java          -0.82220    0.68776   -1.20  0.23190    
# ---
#   Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Number of observations: total=252, dist_code=252 
# Random effect variance(s):
#   Group=dist_code
# Variance    StdDev
# (Intercept) 1.411e-07 0.0003757
# 
# 
# Log-likelihood: -107.173 
summary(fit_nb)

# Call:
#   glmmadmb(formula = unkomptemp ~ golkarvs + rel5050gr21 + unkomptemp_1 + 
#              securityadj + lpdrbcap + lpop + larea + urban + java + (1 | 
#                                                                        dist_code), data = nosepdatna, family = "nbinom", zeroInflation = FALSE)
# 
# AIC: 195 
# 
# Coefficients:
#   Estimate Std. Error z value Pr(>|z|)    
# (Intercept)  -25.91448    7.30890   -3.55  0.00039 ***
#   golkarvs       5.09866    1.72900    2.95  0.00319 ** 
#   rel5050gr21    2.73966    1.33720    2.05  0.04048 *  
#   unkomptemp_1   0.14050    0.03359    4.18  2.9e-05 ***
#   securityadj    0.00543    0.00289    1.88  0.06059 .  
# lpdrbcap       0.51193    0.42858    1.19  0.23229    
# lpop           1.47375    0.52441    2.81  0.00495 ** 
#   larea          0.10469    0.27061    0.39  0.69884    
# urban          1.48066    1.23110    1.20  0.22909    
# java          -1.28011    0.96046   -1.33  0.18259    
# ---
#   Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Number of observations: total=252, dist_code=252 
# Random effect variance(s):
#   Group=dist_code
# Variance   StdDev
# (Intercept) 1.222e-06 0.001106
# 
# Negative binomial dispersion parameter: 0.3581 (std. err.: 0.18631)
# 
# Log-likelihood: -85.4989 

summary(fit_zipoisson) 

# Call:
#   glmmadmb(formula = unkomptemp ~ golkarvs + rel5050gr21 + unkomptemp_1 + 
#              securityadj + lpdrbcap + lpop + larea + urban + java + (1 | 
#                                                                        dist_code), data = nosepdatna, family = "poisson", zeroInflation = TRUE)
# 
# AIC: 245.6 
# 
# Coefficients:
#   Estimate Std. Error z value Pr(>|z|)   
# (Intercept)  -19.85379    7.51750   -2.64   0.0083 **
#   golkarvs       3.54830    1.92030    1.85   0.0646 . 
# rel5050gr21    3.26276    1.68660    1.93   0.0530 . 
# unkomptemp_1  -0.11050    0.39448   -0.28   0.7794   
# securityadj    0.00331    0.00284    1.16   0.2446   
# lpdrbcap       0.34060    0.40421    0.84   0.3994   
# lpop           1.11856    0.57431    1.95   0.0515 . 
# larea          0.22180    0.34869    0.64   0.5247   
# urban          1.35111    1.38060    0.98   0.3278   
# java          -1.03862    1.16650   -0.89   0.3733   
# ---
#   Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Number of observations: total=252, dist_code=252 
# Random effect variance(s):
#   Group=dist_code
# Variance    StdDev
# (Intercept) 7.554e-07 0.0008691
# 
# Zero-inflation: 0.72026  (std. err.:  0.093457 )
# 
# Log-likelihood: -110.807


summary(fit_zinb)
# 
# #Call:
# glmmadmb(formula = unkomptemp ~ golkarvs + rel5050gr21 + unkomptemp_1 + 
#            securityadj + lpdrbcap + lpop + larea + urban + java + (1 | 
#                                                                      dist_code), data = nosepdatna, family = "nbinom", zeroInflation = TRUE)
# 
# AIC: 247.6 
# 
# Coefficients:
#   Estimate Std. Error z value Pr(>|z|)   
# (Intercept)  -19.85379    7.50960   -2.64   0.0082 **
#   golkarvs       3.55453    1.91800    1.85   0.0638 . 
# rel5050gr21    3.25965    1.68640    1.93   0.0532 . 
# unkomptemp_1  -0.10847    0.39251   -0.28   0.7823   
# securityadj    0.00331    0.00284    1.17   0.2437   
# lpdrbcap       0.34219    0.40424    0.85   0.3973   
# lpop           1.11843    0.57365    1.95   0.0512 . 
# larea          0.22103    0.34885    0.63   0.5263   
# urban          1.34940    1.38080    0.98   0.3284   
# java          -1.03753    1.16440   -0.89   0.3729   
# ---
#   Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Number of observations: total=252, dist_code=252 
# Random effect variance(s):
#   Group=dist_code
# Variance    StdDev
# (Intercept) 1.047e-07 0.0003236
# 
# Negative binomial dispersion parameter: 403.43 (std. err.: 4.9017)
# Zero-inflation: 0.71973  (std. err.:  0.093603 )
# 
# Log-likelihood: -110.817 
summary(fit_zinb1)
# 
# Call:
#   glmmadmb(formula = unkomptemp ~ golkarvs + rel5050gr21 + unkomptemp_1 + 
#              securityadj + lpdrbcap + lpop + larea + urban + java + (1 | 
#                                                                        dist_code), data = nosepdatna, family = "nbinom1", zeroInflation = TRUE)
# 
# AIC: 247.6 
# 
# Coefficients:
#   Estimate Std. Error z value Pr(>|z|)   
# (Intercept)  -19.85561    7.51830   -2.64   0.0083 **
#   golkarvs       3.55231    1.92160    1.85   0.0645 . 
# rel5050gr21    3.26182    1.68730    1.93   0.0532 . 
# unkomptemp_1  -0.10977    0.39413   -0.28   0.7806   
# securityadj    0.00331    0.00284    1.17   0.2440   
# lpdrbcap       0.34121    0.40415    0.84   0.3985   
# lpop           1.11853    0.57437    1.95   0.0515 . 
# larea          0.22158    0.34881    0.64   0.5253   
# urban          1.34914    1.38030    0.98   0.3284   
# java          -1.03631    1.16730   -0.89   0.3747   
# ---
#   Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Number of observations: total=252, dist_code=252 
# Random effect variance(s):
#   Group=dist_code
# Variance    StdDev
# (Intercept) 2.558e-08 0.0001599
# 
# Negative binomial dispersion parameter: 1.001 (std. err.: 0.00060691)
# Zero-inflation: 0.72006  (std. err.:  0.093579 )
# 
# Log-likelihood: -110.814

## Get AIC values
library(bbmle)
AICtab(fit_poisson, fit_nb, fit_zinb, fit_zipoisson, fit_zinb1)

# dAIC df
# fit_nb         0.0 12
# fit_poisson   41.3 11
# fit_zipoisson 50.6 12
# fit_zinb1     52.6 13
# fit_zinb      52.6 13

## a vector for varnames
vn<- c("Golkar voteshare","Ratio of second to largest\ngroup proportion","Riots in prior year","Security spending","GDP per capita (ln)", "Population (ln)", "Area", "Urban", "Java", "intercept") 
##plot model performance
## in writing this up, say postsoeharto==1, afterel==1, all missing values are omitted, papua and aceh were dropped. 
## glmmadmb
## Figure 4 in Online Appendix
library(coefplot2)
coefplot2(fit_poisson)
coefplot2(fit_nb)
coefplot2(fit_zipoisson)
coefplot2(fit_zinb)
coefplot2(fit_zinb1)
coefplot2(list(P=fit_poisson, 
               NB=fit_nb, 
               ZIP=fit_zipoisson,
               ZINB=fit_zinb, 
               ZINB1=fit_zinb1),  
            main="Coefficient Estimates",
          legend.x="right",
          legend=TRUE,
          legend.args=list(cex = 0.55, lwd = 3)
                )
### save graph as goodnessoffitTOHA in political competition/revisions/graphs folder. 
pdf("goodnessoffit.pdf", height = 4.5, width = 5.5)
dev.off()



### Use the code below to generate Figure 5: Predicted Riots 
## Fitting a negative binomial model
fit_nb <- glmmadmb(unkomptemp ~ golkarvs + rel5050gr21 + unkomptemp_1 + securityadj+
                     lpdrbcap + lpop + larea + urban + java + 
                     (1|dist_code),
                   family = "nbinom",
                   zeroInflation = FALSE,
                   data = nosepdatna)

## generate predicted values
## golkarvs low = 0.05
## golkarvs high =0.95

## Generating predicted count of riots plot, setting everything at the mean.  

load("nosepdatna.rdata")

library(ggplot2)
library(glmmADMB)

fit_nb <- glmmadmb(unkomptemp ~ revvotmar + rel5050gr21 + unkomptemp_1 +
                     securityadj + 
                     lpdrbcap + lpop + larea + urban + java +
                     golkarvs + 
                     (1|dist_code),
                   family = "nbinom",
                   zeroInflation = FALSE,
                   data = nosepdatna)

newdat <- expand.grid(
  golkarvs = seq(0, 1, by = 0.01),
  revvotmar = mean(nosepdatna$revvotmar, na.rm = TRUE),
  rel5050gr21 = mean(nosepdatna$rel5050gr21, na.rm = TRUE),
  unkomptemp_1 = mean(nosepdatna$unkomptemp_1, na.rm = TRUE),
  securityadj = mean(nosepdatna$securityadj, na.rm = TRUE),
  lpdrbcap = mean(nosepdatna$lpdrbcap, na.rm = TRUE),
  lpop = mean(nosepdatna$lpop, na.rm = TRUE),
  larea = mean(nosepdatna$larea, na.rm = TRUE),
  urban = levels(nosepdatna$urban),
  java = levels(nosepdatna$java))

pred <- data.frame(predict(fit_nb, newdata = newdat, type = "response",
                           interval = "confidence"), newdat)
pred_log <- data.frame(predict(fit_nb, newdata = newdat, type = "link",
                               interval = "confidence"), newdat)

pdf("pred_counts.pdf", height = 4.5, width = 5.5)
ggplot(pred, aes(x = golkarvs, y = fit, color = urban, shape = java)) +
  geom_point(size = 1.4) +
  # geom_ribbon(aes(ymin = lwr, ymax = upr), alpha = 0.3, color = NA) +
  labs(x = "Golkar Voteshare", y = "Count of Riots", title = "Predicted Count of Riots") +
  theme_bw()
dev.off()

pdf("pred_log_count.pdf", height = 4.5, width = 5.5)
ggplot(pred_log, aes(x = golkarvs, y = fit, color = urban, shape = java)) +
  geom_point(size = 1.4) +
  # geom_ribbon(aes(ymin = lwr, ymax = upr), alpha = 0.3, color = NA) +
  labs(x = "Golkar Voteshare", y = "Count of Riots (logged)", title = "Predicted Count of Riots (logged)") +
  theme_bw()
dev.off()


